function [IMDS,SLC,T] = dataHomAug(sigIn,SigSegmDataIn,LIn,TIn,ParamWN)
%dataHomAug - Data homogeneization and augmentation
%
% [IMDS,SLC,T] = dataHomAug(sigIn,SigSegmDataIn,LIn,TIn,ParamWN)
%
% This function allows the homogeneization (in the sense of homogeization
% of cardinality of the corresponding sets) and, where necessary, the data 
% augmentation for the time series of rainfall and, possibly, velocity by
% carrying out these operations:
%   - choice of amount of time series to be used for CNN training for each
%     level;
%   - random selection of time series of the lower level, which typically
%     is overrepresented;
%   - data augmentation for the time series of higher levels, which are
%     underrepresented, including generation of scalogram-based images;
%   - transfer of available/generated images to the corresponding output
%     folders;
%   - generation of the datastores for the subsequent CNN training.
% 
% The computations are carried according to these input arguments:
%   sigIn        : input time series, where it is either sigIn = [t R V]
%                  or sigIn = [t V]; 
%   SigSegmDataIn: variable provided by a session of PluVelScalogram, where
%                       SigSegmDataIn = {tRef,t1,t2,filenaIn}; 
%   LIn, TIn     : variable  LIn = {tRef,tIn,tFin,ClassTS} and table TIn
%                  provided by a session of trendClass.
%
% Moreover, these fields from ParamWN are used:
%
%   sigmaDA      : standard deviation of rainfall and velocity to be used
%                  in data augmentation. If length(sigmaDA) == 2, it is
%                  sigmaR = sigmaDa(1) and sigmaV = sigmaDA(2). If sigmaDA
%                  is a scalar, it is sigmaR = sigmaDA. Please note that
%                  sigmaDA must be coherent with the sigIn.
%   FoldOut      : general folder of output data. For each level Lk (k=1:4 
%                  or k=1:7, depending on the number of classes represented 
%                  in ClassTS), a nested folder whose name is Lk is generated
%                  and the corresponding output scalogram images are placed
%                  here.
%
% If ParamWN is a char variable, it is supposed to be the name of a file
% having a field named 'ParamWN'. If the file loading or the extraction
% of the ParamWN value is not successful, it is ParamWN = [].
% If ParamWN is undefined or empty, a session of DefParam runs to generate
% ParamWN.
%
% Oputput variables:
%   IMDS : output images datastore to be used for the CNN training;
%   SLC  : cell array such that SLC(h) is the cell array
%                   SLC(h) = {tRef,tIn,tFin,t1,t2,filena},
%          where tRef is the reference time, tIn and tFin are the initial
%          and final points of the times series segments used for trend 
%          classification, t1 and t2 are the initial and final times of the
%          time series used for rainfall scalogram generation (if velocity
%          data are also used, the corresponding initial and final point are 
%          t1 and tRef), and filena are the filenames of the scalogram 
%          images placed in the output folders (in case of data augmentation, 
%          the generated files have suffix, before the extension, "_m",
%          where m is the number of the copy of the time series segment to
%          which random noise is added, m >=1);
%   T    : table summarizing the result. For each level, the number of 
%          outcomes and the corresponding percentages are shown.  
%                                
% See also PluVelInspect, PluVelScalogram, trendClass, complArrGen, 
%       dataTVT, TRLearnPluVel.
%
% [IMDS,SLC,T] = dataHomAug(sigIn,SigSegmDataIn,LIn,TIn,ParamWN)

% G. Teza, 2020

if nargin < 5
    ParamWN = [];
end
if ischar(ParamWN)
    try 
        sv = load(ParamWN);
        ParamWN = sv.ParamWN;
    catch
        ParamWN = [];
    end
end
if isempty(ParamWN)
    ParamWN = DefParam;
end
FoldOut = ParamWN.FoldOut;
sigmaDA = ParamWN.SigmaDA;

Wavelet = ParamWN.Wavelet;
F0 = ParamWN.F0;
Sigmat = ParamWN.Sigmat;

t = sigIn(:,1);
R = sigIn(:,2);
if size(sigIn,2) == 2
    V = []; 
else 
    V = sigIn(:,3);
end
Fs = 1/mean(diff(t(1:10)));     % sampling frequency

fprintf('\nSummary about time series classification:\n\n'); 
disp(TIn);
ncl = size(TIn,1);

SLCIn = complArrGen(SigSegmDataIn,LIn,ncl);

%% Interactive choice of level fractions 

% Options about inputdlg: 
options.Resize = 'on';
options.WindowStyle = 'normal';
options.Interpreter = 'tex';
% inputdlg management:
nameDB = 'CHOICE OF AMOUNT OF OUTPUT DATA';
numlinDB = 1;
promptDB  = cell(ncl-1,1);
defaultDB = cell(ncl-1,1);
T2In = TIn{2:end,2};
T2Cons = T2In;
if ncl == 8
    T2ConsRef = max([round(T2In(1)/5) T2In(3)]);
    T2Cons([1 4:6]) = T2ConsRef;
else
    T2ConsRef = max([round(T2In(1)/2) T2In(3)]);
    T2Cons(:) = T2ConsRef;
end
for k = 1:(ncl-1)
    promptDB(k)  = {sprintf('L_%d: total %d, suggested choice %d',k,T2In(k),T2Cons(k))};
    defaultDB(k) = {num2str(T2Cons(k))};
end   
answerDB = inputdlg(promptDB,nameDB,numlinDB,defaultDB,options);

% % Output folders
% if isempty(FoldOut)
%     nameCFO = 'OUTPUT FOLDERS';
%     promptCFO = {'Commont part of output foldernames:'};
%     file1 = SigSegmDataIn{1,4};
%     [pathIn,~,~] = fileparts(file1);
%     defaultCFO = {pathIn};
%     answerCFO = inputdlg(promptCFO,nameCFO,1,defaultCFO,options);
%     FoldOut = answerCFO{1};
% end

sigmaR = sigmaDA(1);
if numel(sigmaDA) == 2
    sigmaV = sigmaDA(2);
else
    sigmaV = [];
end

% Output table inizialization
if ncl == 8
    INA = [1 2 3 7];    % indices of levels not to be augmented 
    nna = 4;
    IDA = [4 5 6];      % indices of levels to be augmented 
    VL = zeros(7,1);    
else
    INA = 1;
    nna = 1;
    IDA = [2 3 4];
    VL = zeros(4,1);
end
CL = TIn{2:end,1};

SLC = cell(ncl,1);

% Test of folders and possible folder generation
if exist(FoldOut,'dir') ~= 7
    mkdir(FoldOut);
end
for k = 1:ncl
    pathk = fullfile(FoldOut,['L' num2str(k)]);
    if exist(pathk,'dir') ~= 7
        mkdir(pathk);
    else
        delete(fullfile(pathk,'*'));    % to empty the output folders to
            % avoid problems due to multiple executions of this function
    end
end

%% Non-augmented data (reduction or 1-1 and image move)

for h = 1:nna
    nh = INA(h);
    SLCInh = SLCIn(nh);     % 1-by-1 cell
    SLCInh = SLCInh{:};     % cell matrix
    if nh == 1
        nLh = round(str2double(answerDB{1}));
        if nLh < T2In(1)
            I1 = sort(randperm(T2In(1),nLh));   % random choice of L1 events
            SLCh = SLCInh(I1,:);    % selected L1-s
        else
            SLCh = SLCInh;
        end
    else
        nLh  = T2In(nh); 
        SLCh = SLCInh;
    end
    pathh = fullfile(FoldOut,['L' num2str(nh)]);
    for k = 1:nLh
        fileInk = SLCh{k,6};
        [~,namefk,extk] = fileparts(fileInk);
        copyfile(fileInk,fullfile(pathh,[namefk extk]));
    end
    SLCh = {SLCh};          % 1-by-1 cell
    SLC(nh) = SLCh;
    VL(nh) = nLh;
end

%% Augmented data

tRef0 = SigSegmDataIn{1,1};
tR10  = SigSegmDataIn{1,2};
tR20  = SigSegmDataIn{1,3};
sizeIm = size(imread(SigSegmDataIn{1,4}));
sizeIm = sizeIm(1:2);

% Filter bank (applicable only if default 'amor' is used)
if strcmp(Wavelet,'amor')
    % CWT filter bank for pluvio data
    lR = numel(tR10:1/Fs:tR20);
    cwtfbR = cwtfilterbank(...
        'SignalLength',lR,...
        'Wavelet','amor',...
        'VoicesPerOctave',24,...
        'samplingFrequency',Fs);
    % CWT filter bank for velocity data (if applicable)
    if ~isempty(V)
        lV = numel(tR10:1/Fs:tRef0);
        cwtfbV = cwtfilterbank(...
            'SignalLength',lV,...
            'Wavelet','amor',...
            'VoicesPerOctave',24,...
            'samplingFrequency',Fs);
    else
        lV = 0;
    end
end

cmap = jet(128);    % scalogram colormap
for h = 1:3      
    IDAh = IDA(h);
    Nrh = round(str2double(answerDB{IDAh}));    % Required number of Lh-s  
    Nch = Nrh-T2In(IDAh);                       % Computation to be carried out
    Nph = floor(Nch/T2In(IDAh));    % Number of complete paths in T2In(IDAh)
    Nrph = rem(Nch,T2In(IDAh));     % remaining computations
    SLCInh = SLCIn(IDAh);
    SLCInh = SLCInh{:};
    pathh = fullfile(FoldOut,['L' num2str(IDAh)]);
    hwb = waitbar(0,sprintf('Data augmentation L%d in progress...',IDAh));
    mwb = 1;    % Count for waitbar
    k = 0;      % Count for paths
    nsh = 1;    % Count for elements of SLCh
    SLCh = cell(Nrh,6);
    while k <= Nph
        if k < Nph
            Nsk = T2In(IDAh);       % Number of files of a complete path 
        else
            Nsk = Nrph;
        end
        for kk = 1:Nsk
            fileInkk = SLCInh{kk,6};
            [~,namefkk,extkk] = fileparts(fileInkk);
            if k == 0               % first while cycle (k=0): no augmentation
                copyfile(fileInk,fullfile(pathh,[namefkk extkk]));
                SLCh(nsh,:) = SLCInh(kk,:);
                nsh = nsh+1;            % count advance in this case
            else                
                t1kk  = SLCInh{kk,4};
                t2Rkk = SLCInh{kk,5};
                t2Vkk = SLCInh{kk,1};
                [~,n1kk]  = min(abs(t-t1kk));
                [~,n2Rkk] = min(abs(t-t2Rkk));
                Rkk = R(n1kk:n2Rkk);
                Rkk = Rkk+sigmaR*randn(size(Rkk));  % data augmentation
                if ~isempty(V)
                    [~,n2Vkk] = min(abs(t-t2Vkk));
                    Vkk = V(n1kk:n2Vkk);
                    Vkk = Vkk+sigmaV*randn(size(Vkk));
                else
                    Vkk = [];
                end
                if numel(Rkk) == lR && numel(Vkk) == lV
                    if strcmp(Wavelet,'amor') % Computation by using the precomputed filter banks:
                        [cfsRkk,~] = cwtfbR.wt(Rkk);
                        if ~isempty(V)
                            [cfsVkk,~] = cwtfbV.wt(Vkk);
                        end
                    else
                        Omega0 = 2*pi*F0;
                        [cfsRkk,~] = contwt(Rkk,1/Fs,[],0.13,[],[],'morlet',[Omega0 Sigmat]);
                        if ~isempty(V)
                            [cfsVkk,~] = contwt(Vkk,1/Fs,[],0.13,[],[],'morlet',[Omega0 Sigmat]);
                        end
                    end
                    % Image generation ad saving:
                    filenakk = fullfile(pathh,[namefkk '_' num2str(k) extkk]);
                    imRkk = ind2rgb(im2uint8(rescale(abs(cfsRkk))),cmap);
                    if ~isempty(V)
                        imVkk = ind2rgb(im2uint8(rescale(abs(cfsVkk))),cmap);
                        if t2Rkk > t2Vkk    % zero padding of velocity scalogram, if necessary
                            MZeroPad = zeros(size(imVkk,1),size(imRkk,2)-size(imVkk,2),3);
                            imVkk = cat(2,imVkk,MZeroPad);
                        end
                        imkk = cat(1,imRkk,imVkk);
                    else
                        imkk = imRkk;
                    end
                    imkk = imresize(imkk,sizeIm);
                    imwrite(imkk,filenakk);
                    SLChkk = {t2Vkk,SLCInh{kk,2},SLCInh{kk,3},t1kk,t2Rkk,filenakk};
                    SLCh(nsh,:) = SLChkk;
                    waitbar(mwb/Nch,hwb);
                    mwb = mwb+1;
                    nsh = nsh+1;        % count increment in this case 
                end
            end
        end
        k = k+1;
    end
    close(hwb);             % waitbar closing
    if nsh <= Nrh
        SLCh(nsh-1:end,:) = [];
        Nrhe = size(SLCh,1);
    else
        Nrhe = Nrh;
    end
    SLCh = {SLCh};          % 1-by-1 cell
    SLC(IDAh) = SLCh;
    VL(IDAh) = Nrhe;
end

%% Final computations

IMDS = imageDatastore(FoldOut,...
    'IncludeSubfolders',true,...
    'LabelSource','FolderNames');

nf = sum(VL);
PL = VL/nf*100;
T = table(CL,VL,PL,'VariableNames',{'classification','number','percentage'});
fprintf('\nSummary after data augmentation:\n\n');
disp(T);      